"""ICU-based collation.

This collation backend uses the International Components for Unicode
library to provide accurate and high-performance collation. It
supports multiple locales and advanced sorting capabilities.

Use this collation backend if possible; it's by far the best.

Avoid this backend if...
 - ICU is not available for your system.

"""

__all__ = ["Collator"]

import collate._abcollator
import collate._locale
import collate.errors

from collate.icu import _icu

class Collator(collate._abcollator.Collator):
    """ICU-based collation."""

    def __init__(self, locale, encoding=None):
        super(Collator, self).__init__(locale, encoding)
        locale, encoding = collate._locale.getpair(locale, encoding)
        icu_locale = "root" if locale == "C" else locale
        self._collator = _icu.Collator(icu_locale)
        self.locale = self._collator.locale
        self.encoding = collate._locale.encoding(encoding)
        if self._collator.used_default_information and locale != "C":
            raise collate.errors.InvalidLocaleError(locale)

        try:
            self._breaker = _icu.WordBreaker(icu_locale)
        except ValueError:
            # Thai is the only language with a special break locale,
            # so this is a harmless error.
            self._breaker = _icu.WordBreaker("root")

    def words(self, string):
        """Split the string along word boundries."""
        string = self.unicode(string)
        return self._breaker.words(string)

    def key(self, string):
        """Sort key for a string.

        If the string is a str instance, it is decoded to a unicode
        instance according to the 'encoding' attribute of the
        Collator.
        """
        string = self.unicode(string)
        return self._collator.key(string)

